
set more off 
cap log close 
clear all
set niceness 10 
set processor 4


*** Paths ****
gl path1 = "/Users/Wei/Dropbox/Census"
gl path2 = "/Users/Wei/Dropbox/Fertility/Workingdata"
gl path3 = "/Users/Wei/Dropbox/Fertility/Results"
gl path4 = "/Users/Wei/Dropbox/Fertility/Figures"

log using "$path3/census_data.log", replace 


use "$path1/china1990.dta",clear 
ren birthyear year_birth
ren relate relation 
mvencode hhcode*, mv(9999) 
egen hhid =group(province prefect  county hhcode* hh_ty hh_no)
ren marst  maritus
ren working ifwork
ren chborn n_birth 
ren chsurv n_survive
ren ceb_m n_mbirth
ren ceb_f n_fbirth
ren ceb_sm n_mchild
ren ceb_sf n_fchild
ren hhtype hktype

ren hh_ty hhtype
ren occu occ
ren industry ind 
drop county
destring countycode,replace force 
ren countycode county
ren race ethnic
egen id = group(hhid county relation)
replace year = 1990
gen prov = res_85 if res_85 > 2 
replace prov = province if mi(prov) 
keep id hhid prov county province sex ethnic hktype educ year_birth age relation  maritus married ifwork n_birth n_survive ind occ year ///
n_mbirth n_fbirth n_mchild n_fchild hhtype
ren county region

su 

gen work = ifwork == 1 if !mi(ifwork)

recode educ (0/1 = 1) (2 = 2 ) ( 3= 3) (4 = 4) (5/9 = 5) 
order hhid id region prov*
save "$path2/marr_90_temp",replace 


use "$path1/census2000_nodup.dta",clear 
tab h02
keep id  r01 r02 r03 r041 r042 r05 r07 r061 r062 r081 r082 /// // region info & Demographics 
r151 r152 r16 r19 r20 r18  /// // Educaiton, SES and occupation 
r17 r23 r241 r241 r242 r251 r252 r253 r254  /// // Mariage & fertility 
 ra0 ra1 r06* // Hukou
 
gen province = real(substr(id,1,2))
gen region = real(substr(id,1,4))
egen hhid = group(id)
ren r01 name 
ren r02 relation
ren r03 sex 
ren r041 year_birth
ren r042 moth_birth
ren r05 ethnic 
ren r07 hktype 
ren r151 educ 
ren r16 ifend_educ
ren r19 ind
ren r20 occ
ren r23 maritus
ren r241 year_mar1
ren r242 month_mar1
ren r251 n_mbirth
ren r252 n_fbirth
ren r253 n_mchild
ren r254 n_fchild
ren r17 ifwork
ren r18 worktime
ren ra0 city_flag
su

gen prov_hk=prov
replace prov_hk = r063 if r063 != 0 & r063 < .
*keep if $age_sample
drop if year_mar1 < 1900 
gen age = 2000 - year_birth 

gen husband = sex == 1 & (relation ==0 | relation ==1) // head or spouse
gen wife = sex == 2 & (relation  ==0 | relation ==1) // head or spouse

gen married = maritus == 2 | maritus == 3 if !mi(maritus)
gen age_marr1 = year_mar1 - year_birth

gen prov = prov_hk
replace prov = province 

gen year = 2000 
recode educ (0/2 = 1) (3 = 2 ) ( 4= 3) (5/6 = 4) (7/9 = 5) 
gen work = ifwork == 1 if !mi(ifwork)
gen n_birth = n_mbirth + n_fbirth 
gen n_survive = n_mchild + n_fchild 
egen iid = group(id)
drop id
ren iid id
save "$path2/marr_00_temp",replace 


use "$path1/china2005",clear // 2005 census 
keep  region hhid hhtype name relation sex year_birth moth_birth ethnic hktype nbroth nsister prov_5y_res literacy health ///
educ ifend_educ ind occ maritus year_mar1 month_mar1 n_mbirth n_fbirth n_mchild n_fchild age  city_flag income ifwork hour_w prov_hk

su 
destring city_flag, replace force 
drop if city_flag == .

gen province = int(region/100)
gen distric = region - province * 100 
* Keep aged between 15 and 60 first 

*keep if $age_sample
drop if year_mar1 < 1900 

* When look at marriage age, we may need to keep those not marred first? 
gen married = maritus == 2 | maritus == 3 if !mi(maritus)
gen age_marr1 = year_mar1 - year_birth
recode educ (0/1 = 1) (2 = 2 ) ( 3= 3) (4 = 4) (5/9 = 5) 

gen prov = province
replace prov_hk=prov if prov_hk==0

replace prov = prov_hk

gen work = ifwork == 1 if !mi(ifwork)
replace prov = province 

ren hour_w worktime
replace worktime=worktime/8
gen n_birth = n_mbirth + n_fbirth 
gen n_survive = n_mchild + n_fchild 
gen year = 2005
save "$path2/marr_05_temp",replace 


use "$path2/marr_05_temp", clear 
append using "$path2/marr_00_temp"
append using "$path2/marr_90_temp"

cap erase "$path2/marr_05_temp.dta"
cap erase "$path2/marr_00_temp.dta"
cap erase "$path2/marr_90_temp.dta"

drop if mi(year)
drop if mi(year_birth)

gen wt = 1 if year == 1990 | year == 2000 
replace wt = 4 if year == 2005 
cap drop id 
gen male = sex == 1 

gen han = ethnic == 1 

replace age_marr1 = . if age_marr1 < 15 | age_marr1 > 60 
replace age = year - year_birth 


replace hktype = 9 if hktype == 0 | mi(hktype)
*drop if hktype == 9

gen married_ever = maritus != 1 if !mi(maritus)

gen women = male == 0 
gen men = male == 1 
gen senior = educ >= 4 
gen college = educ >= 5
replace worktime = . if work == 0 
gen hour_w = worktime * 8 if year == 2005 
replace hour_w = worktime *8 if year == 2000 

replace occ = int(occ/10) if year <= 2000
gen high_occ = occ <= 39  & occ !=0 if work == 1
replace occ = 11 if occ <= 11 &  occ !=0


cap drop late_marr
gen late_marr =  age_marr > 25  if !mi(age_marr)
replace late_marr = married_ever == 0 if mi(age_marr)
*replace late_marr = . if year == 1990

cap drop divorce_ever
gen divorce_ever= maritus == 3 | maritus == 4 if year >= 2000
replace divorce_ever = maritus == 4 if year == 1990
replace divorce_ever = . if married_ever == 0 

gen hlth = health == 1 if !mi(health)
gen chd_death = n_birth - n_survive >= 1  if !mi(n_birth) & !mi(n_survive) & !(n_birth < n_survive) & n_birth >=1 & women == 1 
gen boy_death = n_mbirth - n_mchild >= 1  if !mi(n_birth) & !mi(n_survive) & !(n_birth < n_survive) & !mi(n_mbirth) & n_mbirth >= 1  & women == 1 
gen girl_death = n_fbirth - n_fchild >= 1  if !mi(n_birth) & !mi(n_survive) & !(n_birth < n_survive) & !mi(n_fbirth) & n_fbirth >= 1  & women == 1 

su late_marr married_ever 
gen sur_rate= n_survive/n_birth *100
replace sur_rate = . if sur_rate > 100 
replace sur_rate = 0 if women == 0 
gen sur_boy = n_mchild/n_mbirth * 100
gen sur_girl = n_fchild/n_fbirth * 100

gen n_birth_gender = n_mbirth - n_fbirth
gen n_child_gender = n_mchild - n_fchild
 
keep if year_birth >= 1940 & year_birth <= 1980 & age >= 25
ren year year_temp

forvalues ag = 0(1)39{
g year = year_birth + `ag' 
merge m:1 prov year using "$path2/fines_use", keepusing(fine) nogen
replace fine = 0 if year < 1979
ren fine fine_age`ag'
drop year
}

forvalues ag = 1(1)3{
g year = year_birth - `ag' 
merge m:1 prov year using "$path2/fines_use", keepusing(fine) nogen
replace fine = 0 if year < 1979
ren fine fine_f`ag'
drop year
}

 
ren year_temp year

egen fine_f2_5 = rowmean(fine_f2 fine_f1 fine_age0 fine_age1-fine_age5)
egen fine_6_10 = rowmean(fine_age6-fine_age10)
egen fine_11_15 = rowmean(fine_age11-fine_age15)
egen fine_16_20 = rowmean(fine_age16-fine_age20)
egen fine_21_25 = rowmean(fine_age21-fine_age25)

egen fine_6_12 = rowmean(fine_age6-fine_age12)
egen fine_13_19 = rowmean(fine_age13-fine_age19)
egen fine_20_25 = rowmean(fine_age20-fine_age25)

egen fine_6_15 = rowmean(fine_age6-fine_age15)
egen fine_16_25 = rowmean(fine_age16-fine_age25)

drop if mi(fine_21_25)
replace year = int(year)
drop if mi(wt)
drop if prov > 70 
replace prov_hk = prov if mi(prov_hk)
drop if prov_hk >= 70 

compress
save "$path2/marr_policy",replace 

******** Child  Mortality data **** 

set more off

use "$path2/marr_policy", clear
su
gen mort_rate = 100 -sur_rate if women == 1 


egen fine_6_20 = rowmean(fine_age6-fine_age20)

gl FINE_ALL = "fine_6_15 fine_16_20 fine_21_25 fine_26_30"
gl FINE_VAR_4 =  "fine_6_15 fine_16_20 fine_21_30"

gl FINE_VAR_3 =  "fine_6_15 fine_16_20 fine_21_25"
gl FINE_VAR_1 = "fine_6_20"

egen han_p = mean(han), by(year hhid prov region)
cap egen fine_6_15 = rowmean(fine_age6-fine_age15)
cap egen fine_16_20 = rowmean(fine_age16-fine_age20)
cap egen fine_21_25 = rowmean(fine_age21-fine_age25)
egen fine_6_18 = rowmean(fine_age6-fine_age18)
egen fine_6_22 = rowmean(fine_age6-fine_age22)
egen fine_8_20 = rowmean(fine_age8-fine_age20)
egen fine_4_20 = rowmean(fine_age4-fine_age20)
egen fine_1_20 = rowmean(fine_age1-fine_age20)

egen fine_4_22 = rowmean(fine_age4-fine_age22)
set more off 
replace high_occ = 0 if high_occ == . & !mi(work)


drop if han_p >0 & han_p < 1 // drop mixed households
drop if women == 0 
set more off
gen n_death = n_birth - n_survive
replace n_death = . if n_death < 0
replace n_birth = 0 if married_ever == 0 
replace n_mbirth = 0 if married_ever == 0  
replace n_fbirth = 0 if married_ever == 0  
replace n_survive = 0 if married_ever == 0 
replace n_mchild = 0 if married_ever == 0 
replace n_fchild = 0 if married_ever == 0 

drop if mi(n_birth)
drop if mi(n_death)

drop if n_birth != n_mbirth + n_fbirth
drop if n_mchild > n_mbirth | n_fchild > n_fbirth

drop if n_birth == 0 // no children 
gen urban_hk = hktype == 2
keep prov year year_birth n_mbirth n_fbirth n_mchild n_fchild fine_1_20 fine_6_20 fine_6_18 fine_6_15 ///
fine_6_22 fine_16_20 fine_8_20 fine_4_20 fine_4_22 fine_21_25 nbroth nsister han_p urban_hk
gen hhid = _n 
 
 preserve 
keep if n_mbirth + n_fbirth <= 5
forvalues i = 1(1)5{
gen child`i' = .
}
reshape long child, i(hhid prov year year_birth n_mbirth n_fbirth n_mchild n_fchild) j(order)
gen cut1 = n_mchild 
gen cut2 = n_mbirth 
gen cut3 = n_mbirth+n_fchild
gen cut4 = n_mbirth+n_fbirth
drop if order > cut4 
gen boy = . 
gen die = . 
replace boy = 1 if order <= cut2 
replace die = 0 if order <= cut1 
replace die = 1 if order > cut1 & order <= cut2 
replace boy = 0 if order > cut2 & order <= cut4 
replace die = 0 if order > cut2 & order <= cut3 
replace die = 1 if order > cut3 & order <= cut4
save "$path2/temp_child_1",replace 
restore 

 preserve 
keep if n_mbirth + n_fbirth > 5
forvalues i = 1(1)18{
gen child`i' = .
}
reshape long child, i(hhid prov year year_birth n_mbirth n_fbirth n_mchild n_fchild) j(order)
gen cut1 = n_mchild 
gen cut2 = n_mbirth 
gen cut3 = n_mbirth+n_fchild
gen cut4 = n_mbirth+n_fbirth
drop if order > cut4 
gen boy = . 
gen die = . 
replace boy = 1 if order <= cut2 
replace die = 0 if order <= cut1 
replace die = 1 if order > cut1 & order <= cut2 
replace boy = 0 if order > cut2 & order <= cut4 
replace die = 0 if order > cut2 & order <= cut3 
replace die = 1 if order > cut3 & order <= cut4
save "$path2/temp_child_2",replace 
restore 
use "$path2/temp_child_1", clear 
append using "$path2/temp_child_2"
drop n_mbirth-n_fchild order child-cut4
save "$path2/child_gender_mort",replace 
cap erase "$path2/temp_child_1.dta"
cap erase "$path2/temp_child_2.dta"



use "$path2/raw_data", clear 
su 
drop fine* time*
ren prov prov_temp
gen prov = province
gen m_birth_year = year - h_m_age
tab m_birth_year
keep if m_birth_year >= 1940 & m_birth_year <= 1980
ren year year_temp
egen max_order = max(order), by(hhid year prov urban)
egen max_age = max(age), by(hhid year prov urban)
qui:{
forvalues ag = 0(1)25{
g year = m_birth_year + `ag' 
merge m:1 prov year using "$path2/fines_use", keepusing(fine) nogen
replace fine = 0 if year < 1979
ren fine fine_age`ag'
drop year
}

forvalues ag = 1(1)3{
g year = m_birth_year - `ag' 
merge m:1 prov year using "$path2/fines_use", keepusing(fine) nogen
replace fine = 0 if year < 1979
ren fine fine_f`ag'
drop year
}
}
egen fine_6_20 = rowmean(fine_age6-fine_age20)
egen fine_6_15 = rowmean(fine_age6-fine_age15)
egen fine_16_20 = rowmean(fine_age16-fine_age20)
egen fine_21_25 = rowmean(fine_age21-fine_age25)
egen fine_6_18 = rowmean(fine_age6-fine_age18)
egen fine_6_22 = rowmean(fine_age6-fine_age22)
egen fine_8_20 = rowmean(fine_age8-fine_age20)
egen fine_4_20 = rowmean(fine_age4-fine_age20)
egen fine_1_20 = rowmean(fine_age1-fine_age20)

egen fine_4_22 = rowmean(fine_age4-fine_age22)

ren year_temp year

ren birth_year by_temp 
ren year yr_temp
gen year = by_temp -2
merge m:1 prov year using "$path2/fines_use", keepusing(fine) nogen
replace fine = 0 if year < 1979
ren fine fine_2
replace year = by_temp +1
merge m:1 prov year using "$path2/fines_use", keepusing(fine) nogen
replace fine = 0 if year < 1979
ren fine fine_1 
replace year = by_temp +1
merge m:1 prov year using "$path2/fines_use", keepusing(fine) nogen
replace fine = 0 if year < 1979
ren fine fine_0 
drop year 
ren yr_temp year 
ren by_temp birth_year 
egen fine = rowmean( fine_1 fine_0)
keep if max_age <= 16
drop prov
ren prov_temp prov 

gen high_order = max_order >= 2 
replace rural = hktype == 1 if year >= 1990
replace rural = urban == 0 if year == 1982

replace educ = . if age <= 6
gen girl = sex == 2 

recode order (2/20 = 2), gen(b_order)
gen han_hh = h_m_han == 1 & h_f_han ==1
gen non_han_hh = h_m_han == 0 & h_f_han ==0

cap drop elig_edu
gen elig_edu = 0 if age >= 7
replace elig_edu = 1 if educ >= 2 & age >= 7 & age < 12 
replace elig_edu = 1 if educ >= 3 & age >= 12 & age < 15
replace elig_edu = 1 if educ >= 4 & age >= 15

cap drop illiter  liter junior senior
gen illiter = educ == 1  if age >= 7
gen liter = educ >= 2 if age >= 7
gen junior = educ >= 3 if age >= 7
gen senior = educ >= 4  if age >= 7



gen wt = 1 
replace wt = 4 if year == 2005 

save "$path2/child_edu",replace 

log close
